
--**********************************************************************
-- 
-- SQL functions for networkflow-based conflation models for the paper:
--  
--
-- Copyright (C) 2020  Ting Lei <leiting@gmail.com>
--
-- This is free software; you can redistribute and/or modify it under
-- the terms of the GNU General Public Licence as published
-- by the Free Software Foundation.
-- See the LICENSE file for more information. *

--**********************************************************************

CREATE SCHEMA IF NOT EXISTS cfn;


-- match function between entities using a fixed cost double-matching model, called fc-bimatching in the IJGIS paper
CREATE OR REPLACE FUNCTION cfn.model_fcdblmatching(
  _ent_tbl_l regclass, _ent_tbl_r regclass
  , _cutoff FLOAT8  DEFAULT '+infinity'::FLOAT8
  , _fixed_cost FLOAT8  DEFAULT 100
  , _penalty_factor FLOAT8 DEFAULT 0.5
)
  RETURNS boolean AS
$func$
DECLARE
BEGIN
  
  --create distance matrix
  EXECUTE format($fmt$
DROP TABLE IF EXISTS %4$s;                              
CREATE TABLE %4$s (entid1 BIGINT, entid2 BIGINT, dist FLOAT8, dist_r FLOAT8);                              
SELECT f.entid entid1, t.entid entid2, %3$s(f.geom, t.geom) dist, %3$s(t.geom, f.geom) dist_r 
FROM %1$s f, %2$s t 
WHERE %3$s(f.geom, t.geom) < $1
ORDER BY f.entid, t.entid
;
  $fmt$, _ent_tbl_l, _ent_tbl_r,'ST_DirectedHausdorffdistance', '_tt_fcdblm_dm') USING _cutoff;
  
  -- form a mincost_circulation input table
  DROP TABLE IF EXISTS I, J, II, JJ, c, E_ba, E_entry, E_excess, E_assign, E_count;
  CREATE TABLE I(i) AS
  (SELECT 'i' || entid1 FROM _tt_fcdblm_dm )
  ;CREATE TABLE J(j) AS
  (SELECT 'j' || entid2 FROM _tt_fcdblm_dm )
  ;CREATE TABLE II(ii) AS
  (SELECT 'ii' || entid1 FROM _tt_fcdblm_dm )
  ;CREATE TABLE JJ(jj) AS
  (SELECT 'jj' || entid2 FROM _tt_fcdblm_dm )
  ;CREATE TABLE c AS
  (SELECT 's' s_, 't' t_, 'r' r_, 'o' o_, 'a' a_, 'b' b_, 'e' e_
          , ((SELECT count(*) FROM I)+(SELECT count(*) FROM J))*2 bigM
          , (SELECT greatest(max(dist), max(dist_r)) FROM _tt_fcdblm_dm) max_d 
  );
  
  -- create edge table
  CREATE TABLE E_ba (origin, destination, capacity, lowerbound, cost, value, vname) AS
  (
    SELECT b_::text, a_::text, bigM::BIGINT, 0::BIGINT, - _fixed_cost::FLOAT8, 0::BIGINT, '' FROM c
  );
  CREATE TABLE E_entry (origin, destination, capacity, lowerbound, cost, value, vname) AS 
  (
    SELECT a_::text, s_::text, bigM::BIGINT, 0::BIGINT, 0::FLOAT8, 0::BIGINT, '' FROM c UNION
    SELECT a_, r_, bigM, 0, 0, 0, '' FROM c UNION
    SELECT t_, b_, bigM, 0, 0, 0, '' FROM c UNION
    SELECT o_, b_, bigM, 0, 0, 0, '' FROM c
  );
  
  CREATE TABLE E_excess (origin, destination, capacity, lowerbound, cost, value, vname) AS
  (
    SELECT j , e_, bigM::BIGINT, 0::BIGINT, 0::FLOAT8, 0, '' FROM c, J UNION
    SELECT ii, e_, bigM, 0, 0, 0, '' FROM c,II UNION
    SELECT e_, b_, bigM, 0, _penalty_factor * c.max_d, 0, '' FROM c
  );
  CREATE TABLE E_assign (origin, destination, capacity, lowerbound, cost, value, vname) AS 
  (
    SELECT 'i'  || entid1, 'j'  || entid2, 1::BIGINT, 0::BIGINT, dist::FLOAT8  , 0::BIGINT, '' FROM _tt_fcdblm_dm UNION
    SELECT 'jj' || entid2, 'ii' || entid1, 1, 0, dist_r, 0, '' FROM _tt_fcdblm_dm
  )
  ;CREATE TABLE E_count (origin, destination, capacity, lowerbound, cost, value, vname) AS
  (
    SELECT s_, i , 1::BIGINT, 0::BIGINT, 0::FLOAT8, 0::BIGINT, '' FROM c, I UNION
    SELECT j , t_, 1, 0, 0, 0, '' FROM c, J UNION
    SELECT r_, jj, 1, 0, 0, 0, '' FROM c,JJ UNION
    SELECT ii, o_, 1, 0, 0, 0, '' FROM c,II 
  )
  ;
  DROP TABLE IF EXISTS _tt_fcdblm_network;
  CREATE TABLE _tt_fcdblm_network AS  
  TABLE e_ba UNION TABLE E_entry UNION TABLE E_excess UNION TABLE E_assign UNION TABLE E_count
    ;
  
  DROP TABLE IF EXISTS _tt_fcdblm_nw_res;
  CREATE TABLE _tt_fcdblm_nw_res AS
  SELECT * FROM mincost_circulation('TABLE _tt_fcdblm_network') t; 
  
  --write model results to match pair by joining with E_assign
  EXECUTE format(
    $fmt$
    UPDATE %1$s SET m_entid = NULL;
    UPDATE %2$s SET m_entid = NULL;
    UPDATE %1$s i SET m_entid = trim(LEADING 'j' FROM res.destination)::BIGINT
    FROM _tt_fcdblm_nw_res res WHERE 'i'  || i.entid = res.origin AND res.value > 0.999;
    UPDATE %2$s j SET m_entid = trim(LEADING 'ii' FROM res.destination)::BIGINT
    FROM _tt_fcdblm_nw_res res WHERE 'jj' || j.entid = res.origin AND res.value > 0.999;
    $fmt$, _ent_tbl_l, _ent_tbl_r);  
  
  RETURN True;
END $func$ LANGUAGE plpgsql;

--e.g.
-- SELECT * FROM cfn.model_fcdblmatching('cfl.ex_part3_f__mt','cfl.ex_part3_t__mt');
-- SELECT * FROM cfn.model_fcdblmatching('cfl.ex_part3_f__mt','cfl.ex_part3_t__mt', 100, 100, 0.5);


-- match function between entities using the (undirected) the fc-matching model in the IJGIS paper
CREATE OR REPLACE FUNCTION cfn.model_fcmatching(
  _ent_tbl_l regclass, _ent_tbl_r regclass
  , _cutoff FLOAT8  DEFAULT '+infinity'::FLOAT8
  , _fixed_cost FLOAT8  DEFAULT 100
)
  RETURNS boolean AS
$func$
DECLARE
BEGIN
  
  --create distance matrix
  EXECUTE format($fmt$
DROP TABLE IF EXISTS %4$s;                              
CREATE TABLE %4$s (entid1 BIGINT, entid2 BIGINT, dist FLOAT8, dist_r FLOAT8);                              
SELECT f.entid entid1, t.entid entid2, %3$s(f.geom, t.geom) dist, %3$s(t.geom, f.geom) dist_r 
FROM %1$s f, %2$s t 
WHERE %3$s(f.geom, t.geom) < $1
ORDER BY f.entid, t.entid
;
  $fmt$, _ent_tbl_l, _ent_tbl_r,'ST_DirectedHausdorffdistance', '_tt_mfcm_dm') USING _cutoff;
  
  -- form a mincost_maxflow input table
  DROP TABLE IF EXISTS I, J, c, E_ba, E_assign, E_count;
  CREATE TABLE I(i) AS
  (SELECT 'i' || entid1 FROM _tt_mfcm_dm )
  ;CREATE TABLE J(j) AS
  (SELECT 'j' || entid2 FROM _tt_mfcm_dm )
  ;CREATE TABLE c AS
  (SELECT 's' s_, 't' t_,
          ((SELECT count(*) FROM I)+(SELECT count(*) FROM J))*2 bigM
  );
  CREATE TABLE E_ba (origin, destination, capacity, lowerbound, cost, value, vname) AS
  (
    SELECT t_::text, s_::text, bigM::BIGINT, 0::BIGINT, - _fixed_cost::FLOAT8, 0::BIGINT, '' FROM c
  );
  CREATE TABLE E_count (origin, destination, capacity, lowerbound, cost, value, vname) AS
  (
    SELECT s_, i , 1::BIGINT, 0::BIGINT, 0::FLOAT8, 0::BIGINT, '' FROM c, I UNION
    SELECT j , t_, 1, 0, 0, 0, '' FROM c, J 
  );
  
  CREATE TABLE E_assign (origin, destination, capacity, lowerbound, cost, value, vname) AS 
  (
    SELECT 'i'  || entid1, 'j'  || entid2, 1::BIGINT, 0::BIGINT, greatest(dist, dist_r)::FLOAT8, 0::BIGINT, '' FROM _tt_mfcm_dm
  )
  ;
  DROP TABLE IF EXISTS _tt_mfcm_network;
  CREATE TABLE _tt_mfcm_network AS  
  TABLE E_ba UNION TABLE E_assign UNION TABLE E_count;
  
  DROP TABLE IF EXISTS _tt_mfcm_nw_res;
  CREATE TABLE _tt_mfcm_nw_res AS
    SELECT * FROM mincost_circulation('TABLE _tt_mfcm_network') t;
 
  --write model results to match pair by joining with E_assign
  EXECUTE format(
    $fmt$
    UPDATE %1$s SET m_entid = NULL;
    UPDATE %2$s SET m_entid = NULL;
    UPDATE %1$s i SET m_entid = trim(LEADING 'j' FROM res.destination)::BIGINT
    FROM _tt_mfcm_nw_res res WHERE 'i'  || i.entid = res.origin AND res.value > 0.999;
    UPDATE %2$s j SET m_entid = trim(LEADING 'i' FROM res.origin)::BIGINT
    FROM _tt_mfcm_nw_res res WHERE 'j' || j.entid = res.destination AND res.value > 0.999;
    $fmt$, _ent_tbl_l, _ent_tbl_r);  
  
  RETURN True;
END $func$ LANGUAGE plpgsql;

--e.g.
-- SELECT * FROM cfn.model_fcmatching('cfl.ex_part3_f__mt','cfl.ex_part3_t__mt');
-- SELECT * FROM cfn.model_fcdblmatching('cfl.ex_part3_f__mt','cfl.ex_part3_t__mt', 100, 100);
